Expand description
Small library to fetch info about a web page: title, description, language, HTTP info, RSS feeds, Opengraph, Schema.org, and more
Usage
use webpage::{Webpage, WebpageOptions};
let info = Webpage::from_url("http://example.org", WebpageOptions::default())
.expect("Could not read from URL");
// the HTTP transfer info
let http = info.http;
// assert_eq!(http.ip, "54.192.129.71".to_string());
assert!(http.headers[0].starts_with("HTTP"));
assert!(http.body.starts_with("<!doctype html>"));
assert_eq!(http.url, "http://example.org/".to_string()); // effective url
assert_eq!(http.content_type, "text/html; charset=UTF-8".to_string());
// the parsed HTML info
let html = info.html;
assert_eq!(html.title, Some("Example Domain".to_string()));
assert_eq!(html.description, None);
assert_eq!(html.opengraph.og_type, "website".to_string());
You can also get HTML info about local data:
use webpage::HTML;
let html = HTML::from_file("index.html", None);
// or let html = HTML::from_string(input, None);
Options
The following configurations are available:
pub struct WebpageOptions {
allow_insecure: bool,
follow_location: bool,
max_redirections: u32,
timeout: std::time::Duration,
useragent: String,
}
use webpage::{Webpage, WebpageOptions};
let options = WebpageOptions { allow_insecure: true, ..Default::default() };
let info = Webpage::from_url("https://example.org", options).expect("Halp, could not fetch");
Re-exports
pub use crate::html::HTML;
pub use crate::http::HTTP;
pub use crate::opengraph::Opengraph;
pub use crate::opengraph::OpengraphObject;
pub use crate::schema_org::SchemaOrg;
Modules
- Info from the parsed HTML document
- Info about the HTTP transfer
- OpenGraph information
- Schema.org information
Structs
- Resulting info for a webpage
- Configuration options